library("phyloseq")
library("vegan")
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-8
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
suppressPackageStartupMessages(library(microViz))
library("ggplot2")
library(jsonlite)
library(patchwork)
library(stringr)
library(tidyr)
For resistance gene annotations CARD database was used.
CARDDB = "~/DB/CARD_02_2024/card.json"
METADATA_PATH = "../sampleMetadata.csv"
metadata <- read.csv(METADATA_PATH, header = TRUE, colClasses = c(Date = "character", Dairy_farming="character", Meat_production="character", Metal_processing="character", Washrooms="character"))
Read data resistance data.
AMR genes were classified in metagenome assemblies using Resistance gene finder.
Then genes were quantified by aligning short read sequencing reads to the assemblies and sequenceing read overlap was counted in positions with ht-seq.
ARG_SCAFFOLDS_F = "../Resistance_genes/AMR_genes_RGI_scaffolds_filtered.csv"
arg_t <- read.csv(ARG_SCAFFOLDS_F, header = TRUE, row.names = 1)
tax_t <- data.frame(Tax = rownames(arg_t), ARG = rownames(arg_t))
# tax_t <- sanitizeRowNames(tax_t)
rownames(tax_t) <- tax_t$Tax
ARG_TAX <- tax_table(as.matrix(tax_t[, c('Tax', 'ARG')]))
ps_amr <- phyloseq(otu_table(arg_t, taxa_are_rows = TRUE), sample_data(metadata), ARG_TAX)
# Fix Sample names
# Create a named vector for mapping
name_mapping <- setNames(metadata$Sample, rownames(metadata))
# Use the mapping to rename the columns
sample_names(ps_amr) <- name_mapping[sample_names(ps_amr)]
Display amount of sequencing reads alligning to a AMR genes found in contigs.
plot_bar(ps_amr)
## Warning in psmelt(physeq): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
options(repr.plot.width=4, repr.plot.height=4)
hist(log10(sample_sums(ps_amr)), breaks=50, main="Sample size distribution", xlab="Sample size (log10)", ylab="Frequency", col="#007c80")
### Filtering
Since we removed some samples from bacterial anlysis, we have to remove them also here.
ps_amr_good = subset_samples(ps_amr, sample_data(ps_amr)$Sample != "Salaspils.2")
ps_amr_good = subset_samples(ps_amr_good, sample_data(ps_amr_good)$Sample != "Talsi.3")
hist(log10(sample_sums(ps_amr_good)), breaks=50, main="Sample size distribution", xlab="Sample size (log10)", ylab="Frequency", col="#007c80")
Now we remove singleton reads that are assigned to genes with only one
alligned read across all samples.
# Less strict filterring option
# remove singletons.
ps_scaffolds_filtered = filter_taxa(ps_amr_good, function(x) sum(x) > 1, prune=TRUE)
max_difference = max(sample_sums(ps_scaffolds_filtered))/min(sample_sums(ps_scaffolds_filtered))
# The max difference in sequencing depth between largest and smallest sample
max_difference
## [1] 7.537805
boxplot(sample_sums(ps_scaffolds_filtered), main="Sequencing depth across samples removed singletons", xlab="", ylab="Number of reads", col="#a6093d")
text(y=boxplot.stats(sample_sums(ps_scaffolds_filtered))$stats, labels=boxplot.stats(sample_sums(ps_scaffolds_filtered))$stats, x=1.25)
Print the dimensions of the filtered data Number of genes found in samples after filtering
dim(data.frame(otu_table(ps_scaffolds_filtered)))
## [1] 417 43
Print the total sequence count after filtering Number of sequences in all samples
sum(data.frame(otu_table(ps_scaffolds_filtered)))
## [1] 138009
Calculate and print the percentage of sequences dropped from the original dataset
original_seq_count = sum(data.frame(otu_table(ps_amr_good)))
filtered_seq_count = sum(data.frame(otu_table(ps_scaffolds_filtered)))
seq_dropped_percentage = ((original_seq_count - filtered_seq_count) / original_seq_count) * 100
# Sequence % dropped from the dataset:
seq_dropped_percentage
## [1] 0.01376532
Show AMR gene distribution across samples
ps_scaffolds_filtered %>%
ps_mutate(Group = factor(sample_data(ps_scaffolds_filtered)$City)) %>%
tax_agg("ARG") %>%
ps_seriate(dist = "bray", method = "OLO_ward") %>%
comp_barplot(tax_level = "ARG",
sample_order = rownames(sample_data(ps_scaffolds_filtered)[order(sample_data(ps_scaffolds_filtered)$Sample), ]),
n_taxa = 10,
label = "Sample") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "bottom")+
facet_grid(~Group, scales = "free", space = "free")
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust vegan
## Short values detected in phyloseq tax_table (nchar<4) :
## Consider using tax_fix() to make taxa uniquely identifiable
## Short values detected in phyloseq tax_table (nchar<4) :
## Consider using tax_fix() to make taxa uniquely identifiable
Using CARD DB we take reconstructed genes to assign for witch AMR group, AMR familie and AMR category they belong.
source("~/ARG_waste_water/src/comperative_metagen_definitions.r")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append,
## as.data.frame, basename, cbind, colnames, dirname, do.call,
## duplicated, eval, evalq, get, grep, grepl, intersect, is.unsorted,
## lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin,
## pmin.int, rank, rbind, rownames, sapply, saveRDS, setdiff, table,
## tapply, union, unique, unsplit, which.max, which.min
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## I, expand.grid, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## The following object is masked from 'package:phyloseq':
##
## distance
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Warning: multiple methods tables found for 'sort'
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## The following object is masked from 'package:phyloseq':
##
## sampleNames
## Warning: multiple methods tables found for 'sort'
## Loading required package: zCompositions
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:patchwork':
##
## area
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: NADA
## Loading required package: survival
##
## Attaching package: 'NADA'
## The following object is masked from 'package:IRanges':
##
## cor
## The following object is masked from 'package:S4Vectors':
##
## cor
## The following object is masked from 'package:stats':
##
## cor
## Loading required package: truncnorm
## Loading required package: latticeExtra
##
## Attaching package: 'latticeExtra'
## The following object is masked from 'package:ggplot2':
##
## layer
##
## Attaching package: 'igraph'
## The following object is masked from 'package:GenomicRanges':
##
## union
## The following object is masked from 'package:IRanges':
##
## union
## The following object is masked from 'package:S4Vectors':
##
## union
## The following objects are masked from 'package:BiocGenerics':
##
## normalize, path, union
## The following object is masked from 'package:tidyr':
##
## crossing
## The following objects are masked from 'package:dplyr':
##
## as_data_frame, groups, union
## The following object is masked from 'package:vegan':
##
## diversity
## The following object is masked from 'package:permute':
##
## permute
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
##
## Attaching package: 'SpiecEasi'
## The following object is masked from 'package:igraph':
##
## make_graph
## The following object is masked from 'package:MASS':
##
## fitdistr
card_tax_t <- NA
card_tax_t <- process_card_data(arg_t)
#
# SEPERATE DRUG CLASSES INTO INDIVIDUAL NAMES
#
# Assuming tax_t is your data frame and DrugClass is the column of interest
# unique_drug_classes
unique_drug_classes <- card_tax_t %>%
# Separate the DrugClass column into rows by splitting on ";"
separate_rows(DrugClass, sep = ";") %>%
# Select unique DrugClass names
distinct(DrugClass) %>%
# Pull the DrugClass column to get a vector of unique drug class names
pull(DrugClass)
#unique_amr_families
unique_amr_families <- card_tax_t %>%
# Separate the DrugClass column into rows by splitting on ";"
separate_rows(AMRGeneFamily, sep = ";") %>%
# Select unique DrugClass names
distinct(AMRGeneFamily) %>%
# Pull the DrugClass column to get a vector of unique drug class names
pull(AMRGeneFamily)
#unique_amr_categories
unique_amr_categories <- card_tax_t %>%
# Separate the DrugClass column into rows by splitting on ";"
separate_rows(AntibioticCategory, sep = ";") %>%
# Select unique DrugClass names
distinct(AntibioticCategory) %>%
# Pull the DrugClass column to get a vector of unique drug class names
pull(AntibioticCategory)
Create Objects for AMR categories.
drug_class_counts <- create_drug_class_counts(arg_t, card_tax_t, unique_drug_classes)
# Create the AMR families counts
amr_families_counts <- create_amr_families_counts(arg_t, card_tax_t, unique_amr_families)
# Create the AMR categories counts
amr_categories_counts <- create_amr_categories_counts(arg_t, card_tax_t, unique_amr_categories)
# Create the taxonomy table
tax_t_drugs <- create_taxonomy_table(card_tax_t)
physeq_drug_class <- createPhyloseqObject(drug_class_counts, METADATA_PATH)
physeq_amr_families <- createPhyloseqObject(amr_families_counts, METADATA_PATH)
physeq_amr_categories <- createPhyloseqObject(amr_categories_counts, METADATA_PATH)
Show to what Drug classes resistance genes found in metagenome are resistant to.
physeq_amr_categories %>%
ps_mutate(Group = factor(sample_data(physeq_amr_categories)$City)) %>%
tax_agg("ARG") %>%
ps_seriate(dist = "bray", method = "OLO_ward") %>%
comp_barplot(tax_level = "ARG",
sample_order = rownames(metadata[order(metadata$Sample), ]),
n_taxa = 10,
label = "Sample", interactive = TRUE) +
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank(), legend.position = "bottom")+
facet_grid(~Group, scales = "free", space = "free")+
labs(y = "Relative Abundance", fill = "AMR categories")
## Short values detected in phyloseq tax_table (nchar<4) :
## Consider using tax_fix() to make taxa uniquely identifiable
## Short values detected in phyloseq tax_table (nchar<4) :
## Consider using tax_fix() to make taxa uniquely identifiable
physeq_drug_class %>%
ps_mutate(Group = factor(sample_data(physeq_drug_class)$City)) %>%
tax_agg("ARG") %>%
ps_seriate(dist = "bray", method = "OLO_ward") %>%
comp_barplot(tax_level = "ARG",
sample_order = rownames(metadata[order(metadata$Sample), ]),
n_taxa = 10,
label = "Sample", interactive = TRUE) +
theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "bottom")+
facet_grid(~Group, scales = "free", space = "free")
## Warning in ps_melt(ps): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
physeq_amr_families %>%
ps_mutate(Group = factor(sample_data(physeq_amr_families)$City)) %>%
tax_agg("ARG") %>%
ps_seriate(dist = "bray", method = "OLO_ward") %>%
comp_barplot(tax_level = "ARG",
sample_order = rownames(metadata[order(metadata$Sample), ]),
n_taxa = 10,
label = "Sample", interactive = TRUE) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
legend.position = "bottom"
)+
guides(fill = guide_legend(ncol = 3))+
facet_grid(~Group, scales = "free", space = "free")
## Warning in ps_melt(ps): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
# Plot 1 (physeq_drug_class - renamed for clarity)
p1 <- physeq_drug_class %>%
ps_mutate(Group = factor(sample_data(physeq_drug_class)$City)) %>%
tax_agg("ARG") %>%
ps_seriate(dist = "bray", method = "OLO_ward") %>%
comp_barplot(tax_level = "ARG",
sample_order = rownames(metadata[order(metadata$Sample), ]),
n_taxa = 10,
label = "Sample", interactive = FALSE) + # Turn off interactive for combination
theme(
legend.position = "right",
axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_text(size = 14),
legend.text = element_text(size = 12),
legend.title = element_text(size = 14),
strip.text = element_text(size = 14),
) +
facet_grid(~Group, scales = "free", space = "free") +
labs(fill = "Abtibiotic Class")
## Warning in ps_melt(ps): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
# Plot 2 (physeq_amr_families - renamed for clarity)
p2 <- physeq_amr_families %>%
ps_mutate(Group = factor(sample_data(physeq_amr_families)$City)) %>%
tax_agg("ARG") %>%
ps_seriate(dist = "bray", method = "OLO_ward") %>%
comp_barplot(tax_level = "ARG",
sample_order = rownames(metadata[order(metadata$Sample), ]),
n_taxa = 10,
label = "Sample", interactive = FALSE) + # Turn off interactive
theme(
axis.text.x = element_text(angle = 90, hjust = 1, size = 12), # Smaller x-axis text
axis.ticks.x = element_blank(),
axis.text.y = element_text(size = 12),
strip.text = element_blank(),
legend.position = "right",
axis.title.x = element_blank(),
legend.text = element_text(size = 14),
legend.title = element_text(size = 12)
) + # Remove x-axis title
facet_grid(~Group, scales = "free", space = "free") +
ylab("Relative Abundance") + # Clear y-axis label
labs(fill = "AMR family")
## Warning in ps_melt(ps): The sample variables:
## Sample
## have been renamed to:
## sample_Sample
## to avoid conflicts with special phyloseq plot attribute names.
# Combine the plots vertically using patchwork
combined_plot <- p1 / p2
print(combined_plot)
Show gene distribution
GP1 = transform_sample_counts(ps_scaffolds_filtered, function(x) 1E6 * x/sum(x))
arg.sum = tapply(taxa_sums(GP1), tax_table(GP1)[, "ARG"], sum, na.rm=TRUE)
top10args = names(sort(arg.sum, TRUE))[1:10]
GP1 = prune_taxa((tax_table(GP1)[, "ARG"] %in% top10args), GP1)
GP.ord <- ordinate(GP1, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.1459272
## Run 1 stress 0.1882397
## Run 2 stress 0.2179513
## Run 3 stress 0.145776
## ... New best solution
## ... Procrustes: rmse 0.01179639 max resid 0.07034102
## Run 4 stress 0.2288713
## Run 5 stress 0.1462751
## ... Procrustes: rmse 0.02390133 max resid 0.1133681
## Run 6 stress 0.1879321
## Run 7 stress 0.1468095
## Run 8 stress 0.1462751
## ... Procrustes: rmse 0.02390591 max resid 0.113414
## Run 9 stress 0.1457759
## ... New best solution
## ... Procrustes: rmse 8.189113e-05 max resid 0.0003796615
## ... Similar to previous best
## Run 10 stress 0.398611
## Run 11 stress 0.1457759
## ... Procrustes: rmse 3.808941e-06 max resid 1.802221e-05
## ... Similar to previous best
## Run 12 stress 0.2418597
## Run 13 stress 0.1468094
## Run 14 stress 0.1462791
## Run 15 stress 0.1979333
## Run 16 stress 0.1685059
## Run 17 stress 0.187932
## Run 18 stress 0.1814025
## Run 19 stress 0.2210688
## Run 20 stress 0.1809568
## *** Best solution repeated 2 times
GP.ord2 <- ordinate(ps_scaffolds_filtered, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.137757
## Run 1 stress 0.1752451
## Run 2 stress 0.143728
## Run 3 stress 0.1434783
## Run 4 stress 0.1507417
## Run 5 stress 0.1691574
## Run 6 stress 0.1736201
## Run 7 stress 0.1820066
## Run 8 stress 0.1382029
## ... Procrustes: rmse 0.02338058 max resid 0.1427679
## Run 9 stress 0.1476598
## Run 10 stress 0.1381956
## ... Procrustes: rmse 0.02197587 max resid 0.1298196
## Run 11 stress 0.1736545
## Run 12 stress 0.1971622
## Run 13 stress 0.1381956
## ... Procrustes: rmse 0.02197309 max resid 0.1298094
## Run 14 stress 0.1381956
## ... Procrustes: rmse 0.02197544 max resid 0.1298202
## Run 15 stress 0.1483226
## Run 16 stress 0.1475183
## Run 17 stress 0.1487271
## Run 18 stress 0.1381956
## ... Procrustes: rmse 0.02197625 max resid 0.129823
## Run 19 stress 0.1381717
## ... Procrustes: rmse 0.02475446 max resid 0.1418515
## Run 20 stress 0.1475181
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 17: stress ratio > sratmax
## 3: scale factor of the gradient < sfgrmin
plot_ordination(GP1, GP.ord2, type="taxa", color = "ARG", title="ARGs") + theme(legend.position = "bottom")
Normalization by subsampling (rarefaction)
Display rarefication curves.
tab <- otu_table(ps_scaffolds_filtered)
class(tab) <- "matrix"
## Warning in class(tab) <- "matrix": Setting class(x) to "matrix" sets attribute
## to NULL; result will no longer be an S4 object
tab <- t(tab)
rarecurve(tab, step=10, lwd=2, ylab="OTU", label=F)
Rarefication if needed.
# find minimal deapth to rarefy to
df=as.data.frame(sample_sums(ps_scaffolds_filtered))
head(df[order( df[,1] ),],1)
## [1] 820
ps_amr_rare = rarefy_even_depth(ps_scaffolds_filtered, sample.size=820, replace=FALSE, rngseed=123, verbose=FALSE)
Normalization by cumulative sum scaling (CSS)
ps_amr_CSS = microbiomeMarker::normalize(ps_scaffolds_filtered, method="CSS")
## Registered S3 methods overwritten by 'proxy':
## method from
## print.registry_field registry
## print.registry_entry registry
## Registered S3 method overwritten by 'gplots':
## method from
## reorder.factor DescTools
## Default value being used.
Show Top 15 most abundant genes and their relative xount
abu_table <- as.data.frame(otu_table(ps_scaffolds_filtered))
tax_table <- as.data.frame(tax_table(ps_scaffolds_filtered))
rownames(abu_table) <- tax_table$ARG
total_counts <- rowSums(abu_table)
top_organisms <- sort(total_counts, decreasing = TRUE)
top_organisms <- head(top_organisms, n = 15)
top_organisms
## tet(Q)
## 15101
## ErmB
## 6115
## msrE
## 5585
## Escherichia coli EF-Tu mutants conferring resistance to Pulvomycin
## 5393
## sul1
## 4740
## CfxA6
## 4017
## tet(C)
## 3912
## ErmF
## 3676
## tet(O)
## 3569
## mphE
## 3329
## tet(W)
## 3265
## aadS
## 2773
## tet(W/N/W)
## 2674
## OXA-10
## 2483
## CfxA2
## 2451
# Calculate the percentage of the top 15 organisms
# Percentage of top 15 organisms:
top_organisms/sum(rowSums(abu_table))*100
## tet(Q)
## 10.942040
## ErmB
## 4.430870
## msrE
## 4.046838
## Escherichia coli EF-Tu mutants conferring resistance to Pulvomycin
## 3.907716
## sul1
## 3.434559
## CfxA6
## 2.910680
## tet(C)
## 2.834598
## ErmF
## 2.663594
## tet(O)
## 2.586063
## mphE
## 2.412162
## tet(W)
## 2.365788
## aadS
## 2.009289
## tet(W/N/W)
## 1.937555
## OXA-10
## 1.799158
## CfxA2
## 1.775971
Show Drug classes that these genes are affecting
abu_table <- as.data.frame(otu_table(physeq_drug_class))
tax_table <- as.data.frame(tax_table(physeq_drug_class))
rownames(abu_table) <- tax_table$ARG
total_counts <- rowSums(abu_table)
top_organisms <- sort(total_counts, decreasing = TRUE)
top_organisms <- head(top_organisms, n = 15)
top_organisms
## tetracycline antibiotic macrolide antibiotic
## 43210 28062
## penam streptogramin antibiotic
## 20576 17811
## aminoglycoside antibiotic lincosamide antibiotic
## 17632 14842
## cephalosporin streptogramin B antibiotic
## 12319 10667
## streptogramin A antibiotic cephamycin
## 10648 10639
## fluoroquinolone antibiotic penem
## 7925 6481
## carbapenem sulfonamide antibiotic
## 6150 5981
## elfamycin antibiotic
## 5704
# Calculate the percentage of the top 15 organisms
print("Percentage of top 15 organisms:")
## [1] "Percentage of top 15 organisms:"
top_organisms/sum(rowSums(abu_table))*100
## tetracycline antibiotic macrolide antibiotic
## 17.339904 11.261106
## penam streptogramin antibiotic
## 8.257021 7.147443
## aminoglycoside antibiotic lincosamide antibiotic
## 7.075612 5.956002
## cephalosporin streptogramin B antibiotic
## 4.943538 4.280601
## streptogramin A antibiotic cephamycin
## 4.272976 4.269364
## fluoroquinolone antibiotic penem
## 3.180253 2.600785
## carbapenem sulfonamide antibiotic
## 2.467957 2.400138
## elfamycin antibiotic
## 2.288980
Show most common amr_gene families.
abu_table <- as.data.frame(otu_table(physeq_amr_families))
tax_table <- as.data.frame(tax_table(physeq_amr_families))
rownames(abu_table) <- tax_table$ARG
total_counts <- rowSums(abu_table)
top_organisms <- sort(total_counts, decreasing = TRUE)
top_organisms <- head(top_organisms, n = 15)
top_organisms
## tetracycline-resistant ribosomal protection protein
## 31102
## major facilitator superfamily (MFS) antibiotic efflux pump
## 16371
## OXA beta-lactamase
## 12523
## Erm 23S ribosomal RNA methyltransferase
## 10667
## CfxA beta-lactamase
## 6959
## resistance-nodulation-cell division (RND) antibiotic efflux pump
## 6698
## ANT(3_doubleprime)
## 6676
## msr-type ABC-F protein
## 6365
## sulfonamide resistant sul
## 5981
## elfamycin resistant EF-Tu
## 5704
## macrolide phosphotransferase (MPH)
## 4641
## ANT(6)
## 3203
## OXA-10-like beta-lactamase
## 2799
## APH(3_doubleprime)
## 2281
## lincosamide nucleotidyltransferase (LNU)
## 1877
# Calculate the percentage of the top 15 organisms
print("Percentage of top 15 organisms:")
## [1] "Percentage of top 15 organisms:"
top_organisms/sum(rowSums(abu_table))*100
## tetracycline-resistant ribosomal protection protein
## 19.990102
## major facilitator superfamily (MFS) antibiotic efflux pump
## 10.522087
## OXA beta-lactamase
## 8.048873
## Erm 23S ribosomal RNA methyltransferase
## 6.855971
## CfxA beta-lactamase
## 4.472739
## resistance-nodulation-cell division (RND) antibiotic efflux pump
## 4.304987
## ANT(3_doubleprime)
## 4.290847
## msr-type ABC-F protein
## 4.090959
## sulfonamide resistant sul
## 3.844152
## elfamycin resistant EF-Tu
## 3.666116
## macrolide phosphotransferase (MPH)
## 2.982897
## ANT(6)
## 2.058655
## OXA-10-like beta-lactamase
## 1.798993
## APH(3_doubleprime)
## 1.466061
## lincosamide nucleotidyltransferase (LNU)
## 1.206399
Show most commonly affected drug classes.
abu_table <- as.data.frame(otu_table(physeq_amr_categories))
tax_table <- as.data.frame(tax_table(physeq_amr_categories))
rownames(abu_table) <- tax_table$ARG
total_counts <- rowSums(abu_table)
top_organisms <- sort(total_counts, decreasing = TRUE)
top_organisms <- head(top_organisms, n = 15)
top_organisms
## tetracycline minocycline chlortetracycline oxytetracycline
## 42841 32671 32638 30186
## doxycycline demeclocycline erythromycin streptomycin
## 29868 29868 26351 14770
## lincomycin oxacillin NA azithromycin
## 14681 14657 14406 13482
## telithromycin clarithromycin roxithromycin
## 12037 11835 11748
# Calculate the percentage of the top 15 organisms
print("Percentage of top 15 organisms:")
## [1] "Percentage of top 15 organisms:"
top_organisms/sum(rowSums(abu_table))*100
## tetracycline minocycline chlortetracycline oxytetracycline
## 6.307707 4.810324 4.805465 4.444444
## doxycycline demeclocycline erythromycin streptomycin
## 4.397624 4.397624 3.879797 2.174665
## lincomycin oxacillin NA azithromycin
## 2.161561 2.158028 2.121072 1.985026
## telithromycin clarithromycin roxithromycin
## 1.772271 1.742530 1.729720
Exploring differences how AMR gene composition is affected by Municipality factor.
alph_city <- plot_richness(ps_scaffolds_filtered, x="City", title="AMR diversity", measures=c("InvSimpson", "Observed", "Shannon", "Chao1"), nrow = 2, sortby = "Shannon") +
geom_boxplot() +
theme_bw() +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1))+
theme(
legend.position = "right",
axis.text.x = element_text(angle = 90, hjust = 1, size = 14),
axis.text.y = element_text(size = 14),
axis.title = element_text(size = 14),
legend.text = element_text(size = 19),
legend.title = element_text(size = 18)
)
alph_city
Calculate distance
# Calculate diversity indexes
alpha_indexes_amr <- estimate_richness(ps_scaffolds_filtered, split = TRUE, c("Shannon", "Simpson", "InvSimpson"))
kruskal.test(alpha_indexes_amr$Shannon ~ sample_data(ps_scaffolds_filtered)$City)
##
## Kruskal-Wallis rank sum test
##
## data: alpha_indexes_amr$Shannon by sample_data(ps_scaffolds_filtered)$City
## Kruskal-Wallis chi-squared = 28.415, df = 14, p-value = 0.01253
kruskal.test(alpha_indexes_amr$InvSimpson ~ sample_data(ps_scaffolds_filtered)$City)
##
## Kruskal-Wallis rank sum test
##
## data: alpha_indexes_amr$InvSimpson by sample_data(ps_scaffolds_filtered)$City
## Kruskal-Wallis chi-squared = 19.456, df = 14, p-value = 0.1483
Kruskal-Wallis rank sum test show significant differences in overall AMR diversity (Shannon) but dont show significant differences in how specific AMR dominate between cities (InvSimpson).
Lets show minimal and maximux diversity values.
min(alpha_indexes_amr$InvSimpson)
## [1] 16.38333
max(alpha_indexes_amr$InvSimpson)
## [1] 38.66515
min(alpha_indexes_amr$Shannon)
## [1] 3.296257
max(alpha_indexes_amr$Shannon)
## [1] 4.262206
Dunes Test
pinpoint which specific means are significantlt different from the others
library(dunn.test)
dunn_results <- dunn.test(alpha_indexes_amr$Shannon,
sample_data(ps_scaffolds_filtered)$City,
method="bh")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 28.4154, df = 14, p-value = 0.01
##
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | Cesis Dobele Jelgava Jurmala Kuldiga Liepāja
## ---------+------------------------------------------------------------------
## Dobele | -0.845332
## | 0.3425
## |
## Jelgava | 0.000000 0.845332
## | 0.5000 0.3482
## |
## Jurmala | -1.528101 -0.682768 -1.528101
## | 0.1897 0.3711 0.1953
## |
## Kuldiga | -1.658153 -0.812820 -1.658153 -0.130051
## | 0.1892 0.3469 0.1964 0.4754
## |
## Liepāja | -0.487692 0.357640 -0.487692 1.040409 1.170460
## | 0.4212 0.4504 0.4267 0.2795 0.2760
## |
## Madona | -0.162564 0.682768 -0.162564 1.365537 1.495589 0.325128
## | 0.4665 0.3764 0.4713 0.2317 0.1965 0.4548
## |
## Salaspil | -0.218102 0.537986 -0.218102 1.148673 1.264994 0.218102
## | 0.4671 0.4080 0.4721 0.2800 0.2514 0.4773
## |
## Saldus | -0.195076 0.650256 -0.195076 1.333024 1.463076 0.292615
## | 0.4672 0.3812 0.4721 0.2396 0.2035 0.4593
## |
## Sigulda | 1.593127 2.438460 1.593127 3.121229 3.251280 2.080819
## | 0.1823 0.0774 0.1882 0.0236* 0.0302* 0.1229
## |
## Smiltene | 1.105435 1.950768 1.105435 2.633537 2.763588 1.593127
## | 0.2716 0.1490 0.2769 0.0634 0.0600 0.1945
## |
## Talsi | -1.134133 -0.378044 -1.134133 0.232642 0.348964 -0.697928
## | 0.2696 0.4462 0.2751 0.4760 0.4491 0.3746
## |
## Tukums | -0.455179 0.390153 -0.455179 1.072922 1.202973 0.032512
## | 0.4206 0.4459 0.4259 0.2806 0.2672 0.4965
## |
## Valmiera | 1.690665 2.535998 1.690665 3.218767 3.348818 2.178357
## | 0.1909 0.0654 0.1988 0.0225* 0.0426* 0.1285
## |
## Ventspil | 0.552717 1.398050 0.552717 2.080819 2.210870 1.040409
## | 0.4063 0.2240 0.4118 0.1311 0.1291 0.2846
## Col Mean-|
## Row Mean | Madona Salaspil Saldus Sigulda Smiltene Talsi
## ---------+------------------------------------------------------------------
## Salaspil | -0.072700
## | 0.4897
## |
## Saldus | -0.032512 0.043620
## | 0.4917 0.4968
## |
## Sigulda | 1.755691 1.643039 1.788204
## | 0.1889 0.1882 0.1844
## |
## Smiltene | 1.267999 1.206833 1.300512 -0.487692
## | 0.2560 0.2714 0.2477 0.4159
## |
## Talsi | -0.988731 -0.836217 -0.959651 -2.559069 -2.122864
## | 0.2922 0.3413 0.3001 0.0689 0.1266
## |
## Tukums | -0.292615 -0.189022 -0.260102 -2.048306 -1.560614 0.727008
## | 0.4645 0.4649 0.4688 0.1252 0.1887 0.3774
## |
## Valmiera | 1.853229 1.730280 1.885742 0.097538 0.585230 2.646310
## | 0.1676 0.1908 0.1639 0.4842 0.4072 0.0712
## |
## Ventspil | 0.715281 0.712468 0.747794 -1.040409 -0.552717 1.628498
## | 0.3774 0.3731 0.3729 0.2899 0.4175 0.1872
## Col Mean-|
## Row Mean | Tukums Valmiera
## ---------+----------------------
## Valmiera | 2.145845
## | 0.1288
## |
## Ventspil | 1.007896 -1.137948
## | 0.2888 0.2791
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
dunn_results$P.adjusted[dunn_results$P.adjusted < 0.05]
## [1] 0.02363783 0.03015770 0.02252998 0.04260734
dunn_results$chi2
## [1] 28.41543
dunn_results$Z[dunn_results$P.adjusted < 0.05]
## [1] 3.121229 3.251280 3.218768 3.348819
Shows most significantly different pairs between cities.
dunn_results$comparisons[dunn_results$P.adjusted < 0.05]
## [1] "Jurmala - Sigulda" "Kuldiga - Sigulda" "Jurmala - Valmiera"
## [4] "Kuldiga - Valmiera"
Lets compare how simmilar municipalities are between each other using bray-curtis distance on a NMDS plot.
GP.ord2 <- ordinate(ps_amr_CSS, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.137757
## Run 1 stress 0.2005092
## Run 2 stress 0.1722958
## Run 3 stress 0.174337
## Run 4 stress 0.153554
## Run 5 stress 0.1760838
## Run 6 stress 0.1718425
## Run 7 stress 0.138203
## ... Procrustes: rmse 0.02336348 max resid 0.1426252
## Run 8 stress 0.1720912
## Run 9 stress 0.1434781
## Run 10 stress 0.175746
## Run 11 stress 0.1763512
## Run 12 stress 0.1381716
## ... Procrustes: rmse 0.02475104 max resid 0.1419284
## Run 13 stress 0.1700024
## Run 14 stress 0.1382146
## ... Procrustes: rmse 0.02482664 max resid 0.141987
## Run 15 stress 0.1751137
## Run 16 stress 0.137757
## ... Procrustes: rmse 1.875946e-05 max resid 7.134341e-05
## ... Similar to previous best
## Run 17 stress 0.138203
## ... Procrustes: rmse 0.02336629 max resid 0.1426453
## Run 18 stress 0.1381716
## ... Procrustes: rmse 0.02475398 max resid 0.141942
## Run 19 stress 0.143372
## Run 20 stress 0.1767373
## *** Best solution repeated 1 times
p1 = plot_ordination(ps_amr_CSS, GP.ord2, type="samples", color = "City", title="City bray-distance NMDS")+
geom_polygon(aes(fill=City), alpha = 1/2) + geom_point(size=3)
print(p1)
Calculate distances
tax_bray_amr <- phyloseq::distance(ps_amr_CSS, method = "bray")
Lets check variance with ANOVA.
anosim(tax_bray_amr, sample_data(ps_amr_CSS)$City, distance = "bray", permutations = 999)
##
## Call:
## anosim(x = tax_bray_amr, grouping = sample_data(ps_amr_CSS)$City, permutations = 999, distance = "bray")
## Dissimilarity: bray
##
## ANOSIM statistic R: 0.5901
## Significance: 0.001
##
## Permutation: free
## Number of permutations: 999
PERMANOVA
adonis2_rez<-adonis2(tax_bray_amr ~ sample_data(ps_amr_CSS)$City)
print(adonis2_rez)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = tax_bray_amr ~ sample_data(ps_amr_CSS)$City)
## Df SumOfSqs R2 F Pr(>F)
## Model 14 3.0982 0.65871 3.8601 0.001 ***
## Residual 28 1.6052 0.34129
## Total 42 4.7034 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Chcek beta dispersion between City
beta <- betadisper(tax_bray_amr, sample_data(ps_amr_CSS)$City)
print(anova(beta))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 14 0.078542 0.0056101 0.941 0.5309
## Residuals 28 0.166939 0.0059621
plot(beta)
### Population Size
Now look wether population size impact AMR.
kruskal.test(alpha_indexes_amr$Shannon ~ sample_data(ps_scaffolds_filtered)$Population)
##
## Kruskal-Wallis rank sum test
##
## data: alpha_indexes_amr$Shannon by sample_data(ps_scaffolds_filtered)$Population
## Kruskal-Wallis chi-squared = 7.0158, df = 3, p-value = 0.07139
kruskal.test(alpha_indexes_amr$InvSimpson ~ sample_data(ps_scaffolds_filtered)$Population)
##
## Kruskal-Wallis rank sum test
##
## data: alpha_indexes_amr$InvSimpson by sample_data(ps_scaffolds_filtered)$Population
## Kruskal-Wallis chi-squared = 3.9851, df = 3, p-value = 0.2631
plot_richness(ps_scaffolds_filtered, x="Population", title="AMR diversity", measures=c("InvSimpson", "Shannon"), nrow = 2, sortby = "InvSimpson") +
geom_boxplot() +
theme_bw() +
theme(legend.position = "none", axis.text.x = element_text(angle = 90, hjust = 1))+
theme(
legend.position = "right",
axis.text.x = element_text(angle = 90, hjust = 1, size = 14),
axis.text.y = element_text(size = 14),
axis.title = element_text(size = 14),
legend.text = element_text(size = 14),
legend.title = element_text(size = 14)
)
beta_popul <- ps_scaffolds_filtered %>%
dist_calc(dist = "bray", binary = TRUE) %>%
ord_calc("NMDS") %>%
ord_plot(
color = "Population",
shape = "City",
size = 5,
alpha = 0.8
) +
scale_shape_manual(values = c(16, 17, 18, 15, 19, 25, 8, 13, 1, 2, 3, 4, 5, 6, 7)) + # Manual shapes for all hospital types
stat_ellipse(aes(linetype = Population, colour = Population)) +
theme_bw() +
scale_color_brewer(palette = "Set1") +
scale_fill_brewer(palette = "Set1") +
labs(
title = "NMDS of Population Profiles",
x = "NMDS1",
y = "NMDS2"
)+
theme(
legend.position = "right",
axis.text.x = element_text(angle = 90, hjust = 1, size = 14),
axis.text.y = element_text(size = 14),
axis.title = element_text(size = 14),
legend.text = element_text(size = 14),
legend.title = element_text(size = 16)
)
## Run 0 stress 0.1476541
## Run 1 stress 0.1608673
## Run 2 stress 0.1535323
## Run 3 stress 0.1791487
## Run 4 stress 0.183082
## Run 5 stress 0.1892775
## Run 6 stress 0.163148
## Run 7 stress 0.1719986
## Run 8 stress 0.1844325
## Run 9 stress 0.1584246
## Run 10 stress 0.1719985
## Run 11 stress 0.1835751
## Run 12 stress 0.1615521
## Run 13 stress 0.1786126
## Run 14 stress 0.1477985
## ... Procrustes: rmse 0.005259716 max resid 0.02982373
## Run 15 stress 0.1535323
## Run 16 stress 0.178458
## Run 17 stress 0.1847899
## Run 18 stress 0.1747663
## Run 19 stress 0.1618977
## Run 20 stress 0.1765684
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 20: stress ratio > sratmax
beta_popul
adonis2_rez<-adonis2(tax_bray_amr ~ sample_data(ps_amr_CSS)$Population)
print(adonis2_rez)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = tax_bray_amr ~ sample_data(ps_amr_CSS)$Population)
## Df SumOfSqs R2 F Pr(>F)
## Model 3 0.6456 0.13727 2.0684 0.014 *
## Residual 39 4.0578 0.86273
## Total 42 4.7034 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
beta <- betadisper(tax_bray_amr, sample_data(ps_amr_CSS)$Population)
print("BETADISPER")
## [1] "BETADISPER"
print("Population")
## [1] "Population"
print(anova(beta))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 3 0.05728 0.019092 1.8029 0.1626
## Residuals 39 0.41300 0.010590
plot(beta)
No significance in Regional Hospital.
wilcox.test(alpha_indexes_amr$Shannon~sample_data(ps_scaffolds_filtered)$Regional_Hospital,
p.adjust.method = "BH")
##
## Wilcoxon rank sum exact test
##
## data: alpha_indexes_amr$Shannon by sample_data(ps_scaffolds_filtered)$Regional_Hospital
## W = 207, p-value = 0.6196
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(alpha_indexes_amr$InvSimpson~sample_data(ps_scaffolds_filtered)$Regional_Hospital,
p.adjust.method = "BH")
##
## Wilcoxon rank sum exact test
##
## data: alpha_indexes_amr$InvSimpson by sample_data(ps_scaffolds_filtered)$Regional_Hospital
## W = 246, p-value = 0.6718
## alternative hypothesis: true location shift is not equal to 0
Hospital Types
kruskal.test(alpha_indexes_amr$Shannon ~ sample_data(ps_scaffolds_filtered)$Hospital_Type)
##
## Kruskal-Wallis rank sum test
##
## data: alpha_indexes_amr$Shannon by sample_data(ps_scaffolds_filtered)$Hospital_Type
## Kruskal-Wallis chi-squared = 18.538, df = 7, p-value = 0.009763
kruskal.test(alpha_indexes_amr$InvSimpson ~ sample_data(ps_scaffolds_filtered)$Hospital_Type)
##
## Kruskal-Wallis rank sum test
##
## data: alpha_indexes_amr$InvSimpson by sample_data(ps_scaffolds_filtered)$Hospital_Type
## Kruskal-Wallis chi-squared = 12.451, df = 7, p-value = 0.08666
GP.ord2 <- ordinate(ps_amr_CSS, "NMDS", "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.137757
## Run 1 stress 0.1381956
## ... Procrustes: rmse 0.02197242 max resid 0.1298134
## Run 2 stress 0.1377036
## ... New best solution
## ... Procrustes: rmse 0.004229607 max resid 0.02448258
## Run 3 stress 0.1475182
## Run 4 stress 0.1554664
## Run 5 stress 0.1784969
## Run 6 stress 0.1783075
## Run 7 stress 0.1812036
## Run 8 stress 0.179745
## Run 9 stress 0.1685441
## Run 10 stress 0.1784377
## Run 11 stress 0.1382029
## ... Procrustes: rmse 0.02403351 max resid 0.1433942
## Run 12 stress 0.1791362
## Run 13 stress 0.1381719
## ... Procrustes: rmse 0.02530655 max resid 0.1425349
## Run 14 stress 0.1760179
## Run 15 stress 0.1803947
## Run 16 stress 0.1377036
## ... Procrustes: rmse 1.172938e-05 max resid 6.288059e-05
## ... Similar to previous best
## Run 17 stress 0.143372
## Run 18 stress 0.1755806
## Run 19 stress 0.1382029
## ... Procrustes: rmse 0.02403364 max resid 0.1434358
## Run 20 stress 0.1434781
## *** Best solution repeated 1 times
p1 = plot_ordination(ps_amr_CSS, GP.ord2, type="samples", color = "Hospital_Type", title="Hospital_Type bray-distance NMDS")+
stat_ellipse(aes(linetype = Regional_Hospital, colour = Regional_Hospital)) + geom_point(size=3)
print(p1)
adonis2_rez<-adonis2(tax_bray_amr ~ sample_data(ps_amr_CSS)$Hospital_Type)
print(adonis2_rez)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = tax_bray_amr ~ sample_data(ps_amr_CSS)$Hospital_Type)
## Df SumOfSqs R2 F Pr(>F)
## Model 7 1.7110 0.36377 2.8588 0.001 ***
## Residual 35 2.9925 0.63623
## Total 42 4.7034 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Check BETADISPER between Hospital_Types
beta <- betadisper(tax_bray_amr, sample_data(ps_amr_CSS)$Hospital_Type)
print(anova(beta))
## Analysis of Variance Table
##
## Response: Distances
## Df Sum Sq Mean Sq F value Pr(>F)
## Groups 7 0.18447 0.0263526 3.5618 0.005392 **
## Residuals 35 0.25895 0.0073986
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(beta)
dunn_results <- dunn.test(alpha_indexes_amr$Shannon,
sample_data(ps_scaffolds_filtered)$Hospital_Type,
method="bh")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 18.5385, df = 7, p-value = 0.01
##
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | 0 2 3 4 Branch Other
## ---------+------------------------------------------------------------------
## 2 | -0.189022
## | 0.4408
## |
## 3 | -0.645351 -0.495291
## | 0.3301 0.3776
## |
## 4 | 0.729910 1.130959 2.693740
## | 0.3258 0.2581 0.0495
## |
## Branch | -0.836217 -0.727008 -0.465499 -1.824776
## | 0.3319 0.3115 0.3593 0.1361
## |
## Other | 0.043620 0.260102 0.831082 -0.801953 0.959651
## | 0.4826 0.4280 0.3157 0.3114 0.3147
## |
## Red_cros | 1.206833 1.560614 2.510036 0.843079 2.122864 1.300512
## | 0.2450 0.1661 0.0423 0.3493 0.0945 0.2257
## |
## Specilis | 1.643039 2.048306 3.139643 1.459966 2.559069 1.788204
## | 0.1561 0.0946 0.0237* 0.1837 0.0490 0.1291
## Col Mean-|
## Row Mean | Red_cros
## ---------+-----------
## Specilis | 0.487692
## | 0.3650
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
dunn_results$P.adjusted[dunn_results$P.adjusted < 0.05]
## [1] 0.04945864 0.04225157 0.02368146 0.04897794
dunn_results$chi2
## [1] 18.53848
dunn_results$Z[dunn_results$P.adjusted < 0.05]
## [1] 2.693740 2.510036 3.139644 2.559070
dunn_results$comparisons[dunn_results$P.adjusted < 0.05]
## [1] "3 - 4" "3 - Red_cross" "3 - Specilised"
## [4] "Branch - Specilised"
dunn_results <- dunn.test(alpha_indexes_amr$InvSimpson,
sample_data(ps_scaffolds_filtered)$Hospital_Type,
method="bh")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 12.4514, df = 7, p-value = 0.09
##
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | 0 2 3 4 Branch Other
## ---------+------------------------------------------------------------------
## 2 | -0.305343
## | 0.4627
## |
## 3 | -1.005055 -0.755529
## | 0.3391 0.3937
## |
## 4 | 0.104272 0.555198 2.159104
## | 0.4755 0.4265 0.1439
## |
## Branch | -0.995497 -0.785169 -0.317385 -1.407684
## | 0.3195 0.4035 0.4779 0.3184
## |
## Other | -0.712468 -0.455179 0.167895 -1.130959 0.378044
## | 0.3704 0.4543 0.4667 0.3285 0.4703
## |
## Red_cros | -0.043620 0.292615 1.133293 -0.185066 1.046892 0.747794
## | 0.4826 0.4491 0.3599 0.4778 0.3443 0.3744
## |
## Specilis | 1.206833 1.690665 2.938169 1.583343 2.297346 2.145845
## | 0.3539 0.2545 0.0462 0.2645 0.1512 0.1116
## Col Mean-|
## Row Mean | Red_cros
## ---------+-----------
## Specilis | 1.398050
## | 0.2837
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
dunn_results$P.adjusted[dunn_results$P.adjusted < 0.05]
## [1] 0.0462219
dunn_results$chi2
## [1] 12.45137
dunn_results$Z[dunn_results$P.adjusted < 0.05]
## [1] 2.93817
dunn_results$comparisons[dunn_results$P.adjusted < 0.05]
## [1] "3 - Specilised"
Using siamcat we look for genes that are explaining most of the variations by factor groups.
library(SIAMCAT)
## Loading required package: mlr3
## Registered S3 methods overwritten by 'pROC':
## method from
## print.roc huge
## plot.roc huge
relab_ps = transform_sample_counts(ps_scaffolds_filtered, function(x) x/sum(x))
psglom = tax_glom(relab_ps, "ARG")
create_siamcat_plot <- function(psg, label_column, case_value, output_prefix) {
# Create label
sc_label <- create.label(meta=sample_data(psg), label=label_column, case=case_value)
# Create SIAMCAT object
siamcat_o <- siamcat(phyloseq=psg, label=sc_label)
# Filter features
siamcat_o <- filter.features(siamcat_o, filter.method='abundance', cutoff=1e-03)
siamcat_o <- filter.features(siamcat_o, filter.method='prevalence', cutoff=0.05, feature.type='filtered')
# Check associations
siamcat_o <- check.associations(siamcat_o)
association.plot(siamcat_o, panels=c("fc", "prevalence"), prompt = FALSE, verbose = 0)
# Final association plot
svg(paste0("../plots/",output_prefix, "_final_association.svg"), width=12, height=7)
association.plot(siamcat_o, panels=c("fc", "prevalence"), prompt = FALSE, verbose = 0)
dev.off()
cat("plot saved to", paste0(output_prefix, "_final_association.svg"), "\n")
return(siamcat_o)
}
Here most differentially found genes are plotted
label_case_pairs <- list(
list(label = "Hospital_Type", case = "0"),
list(label = "Hospital_Type", case = "2"),
list(label = "Hospital_Type", case = "3"),
list(label = "Hospital_Type", case = "4"),
list(label = "Hospital_Type", case = "Specilised"),
list(label = "Hospital_Type", case = "Other"),
list(label = "Hospital_Type", case = "Red_cross"),
list(label = "Hospital_Type", case = "Branch")
# Add more pairs as needed
)
#psglom = tax_glom(relab_ps, "ARG")
# Loop through the pairs
for (pair in label_case_pairs) {
label_column <- pair$label
case_value <- pair$case
# Create a unique output prefix for each pair
output_prefix <- paste0("siamcat_", label_column, "_", case_value)
# Run the function
siamcat_result <- create_siamcat_plot(psglom, label_column, case_value, output_prefix)
# You can do additional processing with siamcat_result here if needed
volcano.plot(siamcat_result)
print(rownames(associations(siamcat_result))[associations(siamcat_result)$p.adj<0.05])
# Print a message to indicate progress
cat("Completed processing for", label_column, "with case", case_value, "\n")
}
## Label used as case:
## 0
## Label used as control:
## rest
## + finished create.label.from.metadata in 0.011 s
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 43 sample(s).
## +++ checking sample number per class
## Data set has a limited number of training examples:
## rest 41
## 0 2
## Note that a dataset this small/skewed is not necessarily suitable for analysis in this pipeline.
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.05 s
## Features successfully filtered
## Features successfully filtered
## Less than 5 associations found. Consider changing your alpha value.
## Less than 5 associations found. Consider changing your alpha value.
## plot saved to siamcat_Hospital_Type_0_final_association.svg
## Fewer significant features at alpha 0.05 than desired features for annotation (5)!
## [1] "OXA-140"
## Completed processing for Hospital_Type with case 0
## Label used as case:
## 2
## Label used as control:
## rest
## + finished create.label.from.metadata in 0.004 s
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 43 sample(s).
## +++ checking sample number per class
## Data set has a limited number of training examples:
## rest 40
## 2 3
## Note that a dataset this small/skewed is not necessarily suitable for analysis in this pipeline.
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.041 s
## Features successfully filtered
## Features successfully filtered
## Warning in ci.auc.roc(roc, ...): ci.auc() of a ROC curve with AUC == 1 is
## always 1-1 and can be misleading.
## plot saved to siamcat_Hospital_Type_2_final_association.svg
## [1] "oqxB" "Klebsiella pneumoniae KpnF"
## [3] "APH(3)-IIIa" "SAT-4"
## [5] "FOX-5" "LCR-1"
## [7] "ramA" "QnrD1"
## Completed processing for Hospital_Type with case 2
## Label used as case:
## 3
## Label used as control:
## rest
## + finished create.label.from.metadata in 0.009 s
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 43 sample(s).
## +++ checking sample number per class
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.033 s
## Features successfully filtered
## Features successfully filtered
## Less than 5 associations found. Consider changing your alpha value.
## Less than 5 associations found. Consider changing your alpha value.
## plot saved to siamcat_Hospital_Type_3_final_association.svg
## Fewer significant features at alpha 0.05 than desired features for annotation (5)!
## [1] "Acinetobacter baumannii parC conferring resistance to fluoroquinolones"
## Completed processing for Hospital_Type with case 3
## Label used as case:
## 4
## Label used as control:
## rest
## + finished create.label.from.metadata in 0.003 s
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 43 sample(s).
## +++ checking sample number per class
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.039 s
## Features successfully filtered
## Features successfully filtered
## No significant associations found. No plot will be produced.
##
## No significant associations found. No plot will be produced.
## plot saved to siamcat_Hospital_Type_4_final_association.svg
## character(0)
## Completed processing for Hospital_Type with case 4
## Label used as case:
## Specilised
## Label used as control:
## rest
## + finished create.label.from.metadata in 0.004 s
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 43 sample(s).
## +++ checking sample number per class
## Data set has a limited number of training examples:
## rest 40
## Specilised 3
## Note that a dataset this small/skewed is not necessarily suitable for analysis in this pipeline.
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.04 s
## Features successfully filtered
## Features successfully filtered
## Less than 5 associations found. Consider changing your alpha value.
## Less than 5 associations found. Consider changing your alpha value.
## plot saved to siamcat_Hospital_Type_Specilised_final_association.svg
## Fewer significant features at alpha 0.05 than desired features for annotation (5)!
## [1] "IMP-13"
## Completed processing for Hospital_Type with case Specilised
## Label used as case:
## Other
## Label used as control:
## rest
## + finished create.label.from.metadata in 0.003 s
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 43 sample(s).
## +++ checking sample number per class
## Data set has a limited number of training examples:
## rest 40
## Other 3
## Note that a dataset this small/skewed is not necessarily suitable for analysis in this pipeline.
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.036 s
## Features successfully filtered
## Features successfully filtered
## Less than 5 associations found. Consider changing your alpha value.
## Less than 5 associations found. Consider changing your alpha value.
## plot saved to siamcat_Hospital_Type_Other_final_association.svg
## Fewer significant features at alpha 0.05 than desired features for annotation (5)!
## [1] "aadA15"
## Completed processing for Hospital_Type with case Other
## Label used as case:
## Red_cross
## Label used as control:
## rest
## + finished create.label.from.metadata in 0.008 s
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 43 sample(s).
## +++ checking sample number per class
## Data set has a limited number of training examples:
## rest 40
## Red.cross 3
## Note that a dataset this small/skewed is not necessarily suitable for analysis in this pipeline.
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.049 s
## Features successfully filtered
## Features successfully filtered
## Less than 5 associations found. Consider changing your alpha value.
## Less than 5 associations found. Consider changing your alpha value.
## plot saved to siamcat_Hospital_Type_Red_cross_final_association.svg
## Fewer significant features at alpha 0.05 than desired features for annotation (5)!
## [1] "tet(B)" "tetR"
## Completed processing for Hospital_Type with case Red_cross
## Label used as case:
## Branch
## Label used as control:
## rest
## + finished create.label.from.metadata in 0.003 s
## + starting validate.data
## +++ checking overlap between labels and features
## + Keeping labels of 43 sample(s).
## +++ checking sample number per class
## Data set has a limited number of training examples:
## rest 41
## Branch 2
## Note that a dataset this small/skewed is not necessarily suitable for analysis in this pipeline.
## +++ checking overlap between samples and metadata
## + finished validate.data in 0.035 s
## Features successfully filtered
## Features successfully filtered
## Less than 5 associations found. Consider changing your alpha value.
## Less than 5 associations found. Consider changing your alpha value.
## plot saved to siamcat_Hospital_Type_Branch_final_association.svg
## Fewer significant features at alpha 0.05 than desired features for annotation (5)!
## [1] "FOX-2" "QnrD1"
## Completed processing for Hospital_Type with case Branch
In this section industrial factor importance for changes in diversity are evaluated.
Show what are factors used.
colnames(sample_data(ps_amr_CSS))[7:12]
## [1] "Industrial_wastewater_impact"
## [2] "Industrial_wastewater_impact_from_food"
## [3] "Dairy_farming"
## [4] "Meat_production"
## [5] "Metal_processing"
## [6] "Washrooms"
# remove NA values
ps_amr_ww_impact = subset_samples(ps_scaffolds_filtered, sample_data(ps_scaffolds_filtered)$Industrial_wastewater_impact != "Na")
alpha_indexes_ww_impact <- estimate_richness(ps_amr_ww_impact, split = TRUE, c("Shannon", "Simpson", "InvSimpson"))
kruskal.test(alpha_indexes_ww_impact$Shannon ~ sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact)
##
## Kruskal-Wallis rank sum test
##
## data: alpha_indexes_ww_impact$Shannon by sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact
## Kruskal-Wallis chi-squared = 4.7566, df = 3, p-value = 0.1905
Find which pairs are important
dunn_results <- dunn.test(alpha_indexes_ww_impact$Shannon,
sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact,
method="bh")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 4.7566, df = 3, p-value = 0.19
##
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | High Low None
## ---------+---------------------------------
## Low | 0.797924
## | 0.2549
## |
## None | -0.218217 -1.049901
## | 0.4136 0.2203
## |
## Seasonal | -1.366001 -2.114450 -1.187827
## | 0.2579 0.1034 0.2349
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
# remove NA values
ps_amr_ww_impact = subset_samples(ps_scaffolds_filtered, sample_data(ps_scaffolds_filtered)$Industrial_wastewater_impact_from_food != "Na")
alpha_indexes_ww_impact <- estimate_richness(ps_amr_ww_impact, split = TRUE, c("Shannon", "Simpson", "InvSimpson"))
kruskal.test(alpha_indexes_ww_impact$Shannon ~ sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact_from_food)
##
## Kruskal-Wallis rank sum test
##
## data: alpha_indexes_ww_impact$Shannon by sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact_from_food
## Kruskal-Wallis chi-squared = 9.2104, df = 3, p-value = 0.02662
Find which pairs are important
dunn_results <- dunn.test(alpha_indexes_ww_impact$Shannon,
sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact_from_food,
method="bh")
## Kruskal-Wallis rank sum test
##
## data: x and group
## Kruskal-Wallis chi-squared = 9.2104, df = 3, p-value = 0.03
##
##
## Comparison of x by group
## (Benjamini-Hochberg)
## Col Mean-|
## Row Mean | High Low Medium
## ---------+---------------------------------
## Low | 0.676539
## | 0.2493
## |
## Medium | 1.756122 1.132277
## | 0.1186 0.1931
## |
## None | -0.880315 -1.708923 -2.949270
## | 0.2272 0.0875 0.0096*
##
## alpha = 0.05
## Reject Ho if p <= alpha/2
Municipalities were classefied by Dairy_farming, Meat_production, Metal_processing and car washing facilites connected to wastewater system.
wilcox.test(alpha_indexes_amr$Shannon~sample_data(ps_scaffolds_filtered)$Dairy_farming, p.adjust.method = "BH")
##
## Wilcoxon rank sum exact test
##
## data: alpha_indexes_amr$Shannon by sample_data(ps_scaffolds_filtered)$Dairy_farming
## W = 285, p-value = 0.1865
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(alpha_indexes_amr$Shannon~sample_data(ps_scaffolds_filtered)$Meat_production, p.adjust.method = "BH")
##
## Wilcoxon rank sum exact test
##
## data: alpha_indexes_amr$Shannon by sample_data(ps_scaffolds_filtered)$Meat_production
## W = 122, p-value = 0.5917
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(alpha_indexes_amr$Shannon~sample_data(ps_scaffolds_filtered)$Metal_processing, p.adjust.method = "BH")
##
## Wilcoxon rank sum exact test
##
## data: alpha_indexes_amr$Shannon by sample_data(ps_scaffolds_filtered)$Metal_processing
## W = 168, p-value = 0.398
## alternative hypothesis: true location shift is not equal to 0
wilcox.test(alpha_indexes_amr$Shannon~sample_data(ps_scaffolds_filtered)$Washrooms, p.adjust.method = "BH")
##
## Wilcoxon rank sum exact test
##
## data: alpha_indexes_amr$Shannon by sample_data(ps_scaffolds_filtered)$Washrooms
## W = 51, p-value = 0.01183
## alternative hypothesis: true location shift is not equal to 0
# Microbial Community Diversity Analysis Tutorial with Phyloseq
# Permanova
# https://deneflab.github.io/MicrobeMiseq/demos/mothur_2_phyloseq.html
# remove NAs
ps_amr_ww_impact = subset_samples(ps_amr_CSS, sample_data(ps_amr_CSS)$Industrial_wastewater_impact != "Na")
tax_bray_ww <- phyloseq::distance(ps_amr_ww_impact, method = "bray")
adonis2_rez<-adonis2(tax_bray_ww ~ sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact)
print(adonis2_rez)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = tax_bray_ww ~ sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact)
## Df SumOfSqs R2 F Pr(>F)
## Model 3 0.59619 0.21864 2.1452 0.026 *
## Residual 23 2.13067 0.78136
## Total 26 2.72686 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
From Food
# remove NAs
ps_amr_ww_impact = subset_samples(ps_amr_CSS, sample_data(ps_amr_CSS)$Industrial_wastewater_impact_from_food != "Na")
tax_bray_ww <- phyloseq::distance(ps_amr_ww_impact, method = "bray")
adonis2_rez<-adonis2(tax_bray_ww ~ sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact_from_food)
print(adonis2_rez)
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = tax_bray_ww ~ sample_data(ps_amr_ww_impact)$Industrial_wastewater_impact_from_food)
## Df SumOfSqs R2 F Pr(>F)
## Model 3 0.65443 0.31571 3.3833 0.001 ***
## Residual 22 1.41848 0.68429
## Total 25 2.07291 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
metadata_col = colnames(sample_data(ps_amr_CSS))[9:12]
Show rest of the factors.
for (param in metadata_col){
# Using Bray-Curtis distance by default
if (param == "norm_factor" || param == "Sample" || param == "Date"){
next
}
print(param)
formula_str <- paste("tax_bray_amr ~", param)
adonis2_rez<-adonis2(as.formula(formula_str), data = data.frame(sample_data(ps_amr_CSS)))
# View results
print(adonis2_rez)
}
## [1] "Dairy_farming"
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.formula(formula_str), data = data.frame(sample_data(ps_amr_CSS)))
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.1853 0.03941 1.682 0.091 .
## Residual 41 4.5181 0.96059
## Total 42 4.7034 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## [1] "Meat_production"
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.formula(formula_str), data = data.frame(sample_data(ps_amr_CSS)))
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.1392 0.02959 1.2501 0.21
## Residual 41 4.5643 0.97041
## Total 42 4.7034 1.00000
## [1] "Metal_processing"
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.formula(formula_str), data = data.frame(sample_data(ps_amr_CSS)))
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.1102 0.02344 0.984 0.415
## Residual 41 4.5932 0.97656
## Total 42 4.7034 1.00000
## [1] "Washrooms"
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
##
## adonis2(formula = as.formula(formula_str), data = data.frame(sample_data(ps_amr_CSS)))
## Df SumOfSqs R2 F Pr(>F)
## Model 1 0.2435 0.05177 2.2386 0.033 *
## Residual 41 4.4599 0.94823
## Total 42 4.7034 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1